{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\"\"\"Analysis script for analyzing standard experiment, adversarial attacks and random attacks.\n",
    "First must perform experiments, where the result files are saved under the path \"results/{}_{}\"\".format(exp_id, date_time)\n",
    "\"\"\"\n",
    "\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import matplotlib.pylab as plt\n",
    "import pickle\n",
    "from IPython.display import display, HTML\n",
    "pd.options.display.max_rows = 1500\n",
    "pd.options.display.max_columns = 200\n",
    "pd.options.display.width = 1000\n",
    "\n",
    "import sys, os\n",
    "sys.path.append(os.path.join(os.path.dirname(\"__file__\"), '..'))\n",
    "sys.path.append(os.path.join(os.path.dirname(\"__file__\"), '..', '..'))\n",
    "from GIB.experiments.GIB_node_model import get_evasive_dict, get_data, remove_edge_random, add_random_edge\n",
    "from GIB.pytorch_net.util import to_np_array, to_Variable, filter_filename, make_dir\n",
    "from GIB.util import parse_filename, GIB_PATH"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Helper functions:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_dfs(exp_ids, date_time, is_adversarial, include=None):\n",
    "    \"\"\"Get the combined DataFrames for a list of exp_ids, under results folders \"results/{}_{}\".format(exp_id, date_time).\n",
    "    \n",
    "    Args:\n",
    "        exp_ids: a list of exp_id set for the experiments.\n",
    "        date_time: month and date of conducting the experiment.\n",
    "        is_adversarial: if True, the experiment is adversarial attack experiment (run by GIB_node_attack_exp).\n",
    "            If False, the experiment is standard experiment (run by GIB_node_exp).\n",
    "        include: strings much be included for the result files selected for analysis. Default None for no filtering.\n",
    "    \n",
    "    Returns:\n",
    "        df: a Pandas DataFrame containing all the results.\n",
    "    \"\"\"\n",
    "    df = None\n",
    "    for exp_id in exp_ids:\n",
    "        dirname = GIB_PATH + \"/{}_{}/\".format(exp_id, date_time)\n",
    "        # Get individual df from each exp_id:\n",
    "        if is_adversarial:\n",
    "            df_ele = get_df_adversarial(dirname, include=include)\n",
    "        else:\n",
    "            df_ele = get_df_standard(dirname, include=include)\n",
    "        # Combine the dfs into a single df:\n",
    "        if df is None:\n",
    "            df = df_ele\n",
    "        else:\n",
    "            df = pd.concat([df, df_ele])\n",
    "    return df\n",
    "\n",
    "\n",
    "def get_df_adversarial(dirname, include=None, num_attacked=None):\n",
    "    \"\"\"Get the combined DataFrames for a list of exp_ids for attack experiments, under results folders \"results/{}_{}\".format(exp_id, date_time).\"\"\"\n",
    "    if include is None:\n",
    "        include = []\n",
    "    filenames = filter_filename(dirname, include=include)\n",
    "    df_dict_list = []\n",
    "    for i, filename in enumerate(filenames):\n",
    "        if not filename.endswith(\".p\"):\n",
    "            continue\n",
    "        df_dict = {}\n",
    "        baseline = True if \"RGCN\" in filename or \"GCNJaccard\" in filename else False  # The models using DeepRobust should set baseline=True\n",
    "        df_dict.update(parse_filename(filename, is_adversarial=True, baseline=baseline))\n",
    "        try:\n",
    "            data_record = pickle.load(open(dirname + filename, \"rb\"))\n",
    "        except:\n",
    "            print(\"Unable to load {}\".format(filename))\n",
    "            continue\n",
    "        for metric in ['classification_margins_clean_best', 'classification_margins_evasive_best', 'classification_margins_attacked_best']:\n",
    "            metric_list = []\n",
    "            for i, (key, item) in enumerate(data_record.items()):\n",
    "                if num_attacked is not None:\n",
    "                    if i >= num_attacked:\n",
    "                        break\n",
    "                if key in [\"params\", \"models_before\", \"model_dict\", \"best_model_dict\",\"num_layers\", \"best_epoch\"]:\n",
    "                    continue\n",
    "                metric_core = metric[:-5] if baseline else metric  # For baseline models by DeepRobust, the models saved are the best validation model\n",
    "                if metric_core not in item:\n",
    "                    metric_list.append(np.NaN)\n",
    "                else:\n",
    "                    metric_list.append(np.mean(item[metric_core] > 0))\n",
    "                \n",
    "            df_dict[metric] = np.mean(metric_list)\n",
    "        df_dict[\"num_attacked\"] = len(data_record) if num_attacked is None else num_attacked\n",
    "        df_dict_list.append(df_dict)\n",
    "    df = pd.DataFrame(df_dict_list)\n",
    "    return df\n",
    "\n",
    "\n",
    "def get_df_standard(dirname, include=None):\n",
    "    \"\"\"Get the combined DataFrames for a list of exp_ids for standard experiments, under results folders \"results/{}_{}\".format(exp_id, date_time).\"\"\"\n",
    "    if include is None:\n",
    "        include = []\n",
    "    filenames = filter_filename(dirname, include=include)\n",
    "    df_dict_list = []\n",
    "    for i, filename in enumerate(filenames):\n",
    "        if not filename.endswith(\".p\"):\n",
    "            continue\n",
    "        df_dict = {}\n",
    "        df_dict.update(parse_filename(filename, is_adversarial=False))\n",
    "        try:\n",
    "            data_record = pickle.load(open(dirname + filename, \"rb\"))\n",
    "        except:\n",
    "            print(\"{} not loaded.\".format(filename))\n",
    "        for key, item in data_record.items():\n",
    "            if key not in [\"model_dict\", \"best_model_dict\",\"num_layers\", \"best_epoch\"]:\n",
    "                df_dict[key] = item[-1]\n",
    "        df_dict_list.append(df_dict)\n",
    "    df = pd.DataFrame(df_dict_list)\n",
    "    return df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1. Analyze standard experiment:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "exp_ids = [\"node1.0\"]   # Here pass in the list of exp_id that you want to analyze. This exp_id correponds to the \"exp_id\" set in \"GIB_node_attack_exp\"\n",
    "date_time = \"4-6\"       # Month and date that you conduct the experiment. Will access the result files under \"results/{}_{}\".format(exp_id, date_time)\n",
    "df_standard = get_dfs(exp_ids, date_time, is_adversarial=False)  # Obtain combined DataFrame"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Group the dataframe by experimental settings:\n",
    "dff = df_standard.groupby(by=[\"data_type\",\"model_type\", \"beta1\",\"beta2\",\"num_layers\",\"struct_dropout_mode\"]).count()[[\"b_test_f1_micro\", \"seed\"]]\n",
    "dff.unstack(-1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2. Analyze adversarial attacks:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "exp_ids = [\"Cora-GIB-Bern\"]   # Here pass in the list of exp_id that you want to analyze. This exp_id correponds to the \"exp_id\" set in \"GIB_node_attack_exp\"\n",
    "date_time = \"4-6\"             # Month and date that you conduct the experiment. Will access the result files under \"results/{}_{}\".format(exp_id, date_time)\n",
    "df = get_dfs(exp_ids, date_time, is_adversarial=True)  # Obtain combined DataFrame"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Group the dataframe by experimental settings:\n",
    "metrics = ['classification_margins_evasive_best', 'classification_margins_attacked_best', 'classification_margins_clean_best', 'num_attacked']\n",
    "dff = df.groupby(by = [\"data_type\", \"direct_attack\", \"model_type\", \"n_perturbations\"]).mean()[metrics]\n",
    "dff.unstack(level=-1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3. Perform evasive random attacks and analyze:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "exp_id = \"node1.0\"\n",
    "date_time = \"4-6\"\n",
    "dirname = GIB_PATH + \"/{}_{}/\".format(exp_id, date_time)\n",
    "filenames = sorted(filter_filename(dirname, include=[\"ed_0.0\"]))\n",
    "\n",
    "# For each file specified in filenames, perform evaluation on the randomly corrupted (feature with additive Gaussian noise) graph with varying corruption intensity.\n",
    "df_dict_GIBD_feature = []\n",
    "for i, filename in enumerate(filenames):\n",
    "    print(\"Attacking the {}th model\".format(i))\n",
    "    df_dict_GIBD_feature += get_evasive_dict(\n",
    "        dirname,\n",
    "        filename,\n",
    "        perturb_mode=\"feature\",   # Corrupt type. Here since it is feature additive noise, use \"feature\".\n",
    "        feature_perturb_target=[0.5, 1., 1.5],   # Feature noise ratios\n",
    "        n_repeats=5,              # Number of feature attacks per seed. Larger n_repeats results in less variation in the mean.\n",
    "        device_name=\"cuda:0\" if torch.cuda.is_available() else \"cpu\",\n",
    "        verbose=False,\n",
    "    )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_GIBD_features = pd.DataFrame(df_dict_GIBD_feature)\n",
    "dff = df_GIBD_features\n",
    "dff = dff.groupby(by=[\"data_type\",\"model_type\",\"struct_dropout_mode\",\"beta1\", \"beta2\",\"reparam_mode\",\"feature_noise_ratio_evasive\"]).mean()[[\"test_f1_micro_evasive_best\"]]\n",
    "dff_GIBD_features = dff.unstack(-1).round(3) * 100\n",
    "dff_GIBD_features"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python base",
   "language": "python",
   "name": "base"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
